import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn import svm
from sklearn.metrics import accuracy_score
from sklearn import metrics
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
%matplotlib inline
import warnings
warnings.filterwarnings("ignore")
df= pd.read_csv('vehicle.csv')
df.head(10)
| compactness | circularity | distance_circularity | radius_ratio | pr.axis_aspect_ratio | max.length_aspect_ratio | scatter_ratio | elongatedness | pr.axis_rectangularity | max.length_rectangularity | scaled_variance | scaled_variance.1 | scaled_radius_of_gyration | scaled_radius_of_gyration.1 | skewness_about | skewness_about.1 | skewness_about.2 | hollows_ratio | class | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 95 | 48.0 | 83.0 | 178.0 | 72.0 | 10 | 162.0 | 42.0 | 20.0 | 159 | 176.0 | 379.0 | 184.0 | 70.0 | 6.0 | 16.0 | 187.0 | 197 | van |
| 1 | 91 | 41.0 | 84.0 | 141.0 | 57.0 | 9 | 149.0 | 45.0 | 19.0 | 143 | 170.0 | 330.0 | 158.0 | 72.0 | 9.0 | 14.0 | 189.0 | 199 | van |
| 2 | 104 | 50.0 | 106.0 | 209.0 | 66.0 | 10 | 207.0 | 32.0 | 23.0 | 158 | 223.0 | 635.0 | 220.0 | 73.0 | 14.0 | 9.0 | 188.0 | 196 | car |
| 3 | 93 | 41.0 | 82.0 | 159.0 | 63.0 | 9 | 144.0 | 46.0 | 19.0 | 143 | 160.0 | 309.0 | 127.0 | 63.0 | 6.0 | 10.0 | 199.0 | 207 | van |
| 4 | 85 | 44.0 | 70.0 | 205.0 | 103.0 | 52 | 149.0 | 45.0 | 19.0 | 144 | 241.0 | 325.0 | 188.0 | 127.0 | 9.0 | 11.0 | 180.0 | 183 | bus |
| 5 | 107 | NaN | 106.0 | 172.0 | 50.0 | 6 | 255.0 | 26.0 | 28.0 | 169 | 280.0 | 957.0 | 264.0 | 85.0 | 5.0 | 9.0 | 181.0 | 183 | bus |
| 6 | 97 | 43.0 | 73.0 | 173.0 | 65.0 | 6 | 153.0 | 42.0 | 19.0 | 143 | 176.0 | 361.0 | 172.0 | 66.0 | 13.0 | 1.0 | 200.0 | 204 | bus |
| 7 | 90 | 43.0 | 66.0 | 157.0 | 65.0 | 9 | 137.0 | 48.0 | 18.0 | 146 | 162.0 | 281.0 | 164.0 | 67.0 | 3.0 | 3.0 | 193.0 | 202 | van |
| 8 | 86 | 34.0 | 62.0 | 140.0 | 61.0 | 7 | 122.0 | 54.0 | 17.0 | 127 | 141.0 | 223.0 | 112.0 | 64.0 | 2.0 | 14.0 | 200.0 | 208 | van |
| 9 | 93 | 44.0 | 98.0 | NaN | 62.0 | 11 | 183.0 | 36.0 | 22.0 | 146 | 202.0 | 505.0 | 152.0 | 64.0 | 4.0 | 14.0 | 195.0 | 204 | car |
df.shape
(846, 19)
df.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 846 entries, 0 to 845 Data columns (total 19 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 compactness 846 non-null int64 1 circularity 841 non-null float64 2 distance_circularity 842 non-null float64 3 radius_ratio 840 non-null float64 4 pr.axis_aspect_ratio 844 non-null float64 5 max.length_aspect_ratio 846 non-null int64 6 scatter_ratio 845 non-null float64 7 elongatedness 845 non-null float64 8 pr.axis_rectangularity 843 non-null float64 9 max.length_rectangularity 846 non-null int64 10 scaled_variance 843 non-null float64 11 scaled_variance.1 844 non-null float64 12 scaled_radius_of_gyration 844 non-null float64 13 scaled_radius_of_gyration.1 842 non-null float64 14 skewness_about 840 non-null float64 15 skewness_about.1 845 non-null float64 16 skewness_about.2 845 non-null float64 17 hollows_ratio 846 non-null int64 18 class 846 non-null object dtypes: float64(14), int64(4), object(1) memory usage: 125.7+ KB
df.isnull().sum()
compactness 0 circularity 5 distance_circularity 4 radius_ratio 6 pr.axis_aspect_ratio 2 max.length_aspect_ratio 0 scatter_ratio 1 elongatedness 1 pr.axis_rectangularity 3 max.length_rectangularity 0 scaled_variance 3 scaled_variance.1 2 scaled_radius_of_gyration 2 scaled_radius_of_gyration.1 4 skewness_about 6 skewness_about.1 1 skewness_about.2 1 hollows_ratio 0 class 0 dtype: int64
df.describe().T
| count | mean | std | min | 25% | 50% | 75% | max | |
|---|---|---|---|---|---|---|---|---|
| compactness | 846.0 | 93.678487 | 8.234474 | 73.0 | 87.00 | 93.0 | 100.0 | 119.0 |
| circularity | 841.0 | 44.828775 | 6.152172 | 33.0 | 40.00 | 44.0 | 49.0 | 59.0 |
| distance_circularity | 842.0 | 82.110451 | 15.778292 | 40.0 | 70.00 | 80.0 | 98.0 | 112.0 |
| radius_ratio | 840.0 | 168.888095 | 33.520198 | 104.0 | 141.00 | 167.0 | 195.0 | 333.0 |
| pr.axis_aspect_ratio | 844.0 | 61.678910 | 7.891463 | 47.0 | 57.00 | 61.0 | 65.0 | 138.0 |
| max.length_aspect_ratio | 846.0 | 8.567376 | 4.601217 | 2.0 | 7.00 | 8.0 | 10.0 | 55.0 |
| scatter_ratio | 845.0 | 168.901775 | 33.214848 | 112.0 | 147.00 | 157.0 | 198.0 | 265.0 |
| elongatedness | 845.0 | 40.933728 | 7.816186 | 26.0 | 33.00 | 43.0 | 46.0 | 61.0 |
| pr.axis_rectangularity | 843.0 | 20.582444 | 2.592933 | 17.0 | 19.00 | 20.0 | 23.0 | 29.0 |
| max.length_rectangularity | 846.0 | 147.998818 | 14.515652 | 118.0 | 137.00 | 146.0 | 159.0 | 188.0 |
| scaled_variance | 843.0 | 188.631079 | 31.411004 | 130.0 | 167.00 | 179.0 | 217.0 | 320.0 |
| scaled_variance.1 | 844.0 | 439.494076 | 176.666903 | 184.0 | 318.00 | 363.5 | 587.0 | 1018.0 |
| scaled_radius_of_gyration | 844.0 | 174.709716 | 32.584808 | 109.0 | 149.00 | 173.5 | 198.0 | 268.0 |
| scaled_radius_of_gyration.1 | 842.0 | 72.447743 | 7.486190 | 59.0 | 67.00 | 71.5 | 75.0 | 135.0 |
| skewness_about | 840.0 | 6.364286 | 4.920649 | 0.0 | 2.00 | 6.0 | 9.0 | 22.0 |
| skewness_about.1 | 845.0 | 12.602367 | 8.936081 | 0.0 | 5.00 | 11.0 | 19.0 | 41.0 |
| skewness_about.2 | 845.0 | 188.919527 | 6.155809 | 176.0 | 184.00 | 188.0 | 193.0 | 206.0 |
| hollows_ratio | 846.0 | 195.632388 | 7.438797 | 181.0 | 190.25 | 197.0 | 201.0 | 211.0 |
df_nonnumeric= df.iloc[:,-1]
df_numeric= df.iloc[:,:-1]
df_numeric.head()
| compactness | circularity | distance_circularity | radius_ratio | pr.axis_aspect_ratio | max.length_aspect_ratio | scatter_ratio | elongatedness | pr.axis_rectangularity | max.length_rectangularity | scaled_variance | scaled_variance.1 | scaled_radius_of_gyration | scaled_radius_of_gyration.1 | skewness_about | skewness_about.1 | skewness_about.2 | hollows_ratio | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 95 | 48.0 | 83.0 | 178.0 | 72.0 | 10 | 162.0 | 42.0 | 20.0 | 159 | 176.0 | 379.0 | 184.0 | 70.0 | 6.0 | 16.0 | 187.0 | 197 |
| 1 | 91 | 41.0 | 84.0 | 141.0 | 57.0 | 9 | 149.0 | 45.0 | 19.0 | 143 | 170.0 | 330.0 | 158.0 | 72.0 | 9.0 | 14.0 | 189.0 | 199 |
| 2 | 104 | 50.0 | 106.0 | 209.0 | 66.0 | 10 | 207.0 | 32.0 | 23.0 | 158 | 223.0 | 635.0 | 220.0 | 73.0 | 14.0 | 9.0 | 188.0 | 196 |
| 3 | 93 | 41.0 | 82.0 | 159.0 | 63.0 | 9 | 144.0 | 46.0 | 19.0 | 143 | 160.0 | 309.0 | 127.0 | 63.0 | 6.0 | 10.0 | 199.0 | 207 |
| 4 | 85 | 44.0 | 70.0 | 205.0 | 103.0 | 52 | 149.0 | 45.0 | 19.0 | 144 | 241.0 | 325.0 | 188.0 | 127.0 | 9.0 | 11.0 | 180.0 | 183 |
df_numeric.median()
compactness 93.0 circularity 44.0 distance_circularity 80.0 radius_ratio 167.0 pr.axis_aspect_ratio 61.0 max.length_aspect_ratio 8.0 scatter_ratio 157.0 elongatedness 43.0 pr.axis_rectangularity 20.0 max.length_rectangularity 146.0 scaled_variance 179.0 scaled_variance.1 363.5 scaled_radius_of_gyration 173.5 scaled_radius_of_gyration.1 71.5 skewness_about 6.0 skewness_about.1 11.0 skewness_about.2 188.0 hollows_ratio 197.0 dtype: float64
column = df_numeric.columns
length = len(column)
for i in range(0,length):
df_numeric[column[i]].fillna(df_numeric[column[i]].median(), inplace = True)
df.skew()
compactness 0.381271 circularity 0.261809 distance_circularity 0.106585 radius_ratio 0.394978 pr.axis_aspect_ratio 3.830362 max.length_aspect_ratio 6.778394 scatter_ratio 0.607271 elongatedness 0.047847 pr.axis_rectangularity 0.770889 max.length_rectangularity 0.256359 scaled_variance 0.651598 scaled_variance.1 0.842034 scaled_radius_of_gyration 0.279317 scaled_radius_of_gyration.1 2.083496 skewness_about 0.776519 skewness_about.1 0.688017 skewness_about.2 0.249321 hollows_ratio -0.226341 dtype: float64
from sklearn.preprocessing import PowerTransformer
pt = PowerTransformer()
pt.fit(df_numeric)
orig_df = pd.DataFrame(pt.fit_transform(df_numeric))
orig_df.columns = df_numeric.columns
orig_df.head()
| compactness | circularity | distance_circularity | radius_ratio | pr.axis_aspect_ratio | max.length_aspect_ratio | scatter_ratio | elongatedness | pr.axis_rectangularity | max.length_rectangularity | scaled_variance | scaled_variance.1 | scaled_radius_of_gyration | scaled_radius_of_gyration.1 | skewness_about | skewness_about.1 | skewness_about.2 | hollows_ratio | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0.250889 | 0.580135 | 0.097253 | 0.349942 | 1.460002 | 0.747156 | -0.041313 | 0.163051 | -0.019052 | 0.792459 | -0.293757 | -0.118211 | 0.349953 | -0.209612 | 0.148898 | 0.535243 | -0.261662 | 0.133806 |
| 1 | -0.247957 | -0.576832 | 0.160094 | -0.816888 | -0.648789 | 0.435588 | -0.511247 | 0.539220 | -0.525853 | -0.284310 | -0.522119 | -0.504364 | -0.461911 | 0.117730 | 0.681443 | 0.342020 | 0.071166 | 0.414230 |
| 2 | 1.227603 | 0.873652 | 1.471555 | 1.180118 | 0.793256 | 0.747156 | 1.155070 | -1.152131 | 1.045385 | 0.729773 | 1.114092 | 1.149019 | 1.341615 | 0.268943 | 1.379928 | -0.215881 | -0.093673 | -0.003447 |
| 3 | 0.007051 | -0.576832 | 0.034093 | -0.220848 | 0.383072 | 0.435588 | -0.712978 | 0.662983 | -0.525853 | -0.284310 | -0.935944 | -0.695261 | -1.574168 | -1.686670 | 0.148898 | -0.093508 | 1.561779 | 1.617513 |
| 4 | -1.088077 | -0.053863 | -0.750785 | 1.079043 | 3.209391 | 3.973288 | -0.511247 | 0.539220 | -0.525853 | -0.212040 | 1.523408 | -0.548251 | 0.467024 | 3.312080 | 0.681443 | 0.022763 | -1.532626 | -1.616862 |
orig_df.skew()
compactness 0.020835 circularity 0.009902 distance_circularity -0.029340 radius_ratio -0.006298 pr.axis_aspect_ratio -0.099316 max.length_aspect_ratio -0.183269 scatter_ratio 0.049224 elongatedness -0.043290 pr.axis_rectangularity 0.124225 max.length_rectangularity 0.011789 scaled_variance 0.024486 scaled_variance.1 0.046460 scaled_radius_of_gyration -0.009842 scaled_radius_of_gyration.1 0.001559 skewness_about -0.086317 skewness_about.1 -0.096120 skewness_about.2 0.019008 hollows_ratio -0.040011 dtype: float64
df= pd.concat([orig_df,df_nonnumeric], axis=1)
df.head()
| compactness | circularity | distance_circularity | radius_ratio | pr.axis_aspect_ratio | max.length_aspect_ratio | scatter_ratio | elongatedness | pr.axis_rectangularity | max.length_rectangularity | scaled_variance | scaled_variance.1 | scaled_radius_of_gyration | scaled_radius_of_gyration.1 | skewness_about | skewness_about.1 | skewness_about.2 | hollows_ratio | class | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0.250889 | 0.580135 | 0.097253 | 0.349942 | 1.460002 | 0.747156 | -0.041313 | 0.163051 | -0.019052 | 0.792459 | -0.293757 | -0.118211 | 0.349953 | -0.209612 | 0.148898 | 0.535243 | -0.261662 | 0.133806 | van |
| 1 | -0.247957 | -0.576832 | 0.160094 | -0.816888 | -0.648789 | 0.435588 | -0.511247 | 0.539220 | -0.525853 | -0.284310 | -0.522119 | -0.504364 | -0.461911 | 0.117730 | 0.681443 | 0.342020 | 0.071166 | 0.414230 | van |
| 2 | 1.227603 | 0.873652 | 1.471555 | 1.180118 | 0.793256 | 0.747156 | 1.155070 | -1.152131 | 1.045385 | 0.729773 | 1.114092 | 1.149019 | 1.341615 | 0.268943 | 1.379928 | -0.215881 | -0.093673 | -0.003447 | car |
| 3 | 0.007051 | -0.576832 | 0.034093 | -0.220848 | 0.383072 | 0.435588 | -0.712978 | 0.662983 | -0.525853 | -0.284310 | -0.935944 | -0.695261 | -1.574168 | -1.686670 | 0.148898 | -0.093508 | 1.561779 | 1.617513 | van |
| 4 | -1.088077 | -0.053863 | -0.750785 | 1.079043 | 3.209391 | 3.973288 | -0.511247 | 0.539220 | -0.525853 | -0.212040 | 1.523408 | -0.548251 | 0.467024 | 3.312080 | 0.681443 | 0.022763 | -1.532626 | -1.616862 | bus |
df.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 846 entries, 0 to 845 Data columns (total 19 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 compactness 846 non-null float64 1 circularity 846 non-null float64 2 distance_circularity 846 non-null float64 3 radius_ratio 846 non-null float64 4 pr.axis_aspect_ratio 846 non-null float64 5 max.length_aspect_ratio 846 non-null float64 6 scatter_ratio 846 non-null float64 7 elongatedness 846 non-null float64 8 pr.axis_rectangularity 846 non-null float64 9 max.length_rectangularity 846 non-null float64 10 scaled_variance 846 non-null float64 11 scaled_variance.1 846 non-null float64 12 scaled_radius_of_gyration 846 non-null float64 13 scaled_radius_of_gyration.1 846 non-null float64 14 skewness_about 846 non-null float64 15 skewness_about.1 846 non-null float64 16 skewness_about.2 846 non-null float64 17 hollows_ratio 846 non-null float64 18 class 846 non-null object dtypes: float64(18), object(1) memory usage: 125.7+ KB
sns.pairplot(df, hue = 'class')
<seaborn.axisgrid.PairGrid at 0x1619037d6a0>
df.corr()
| compactness | circularity | distance_circularity | radius_ratio | pr.axis_aspect_ratio | max.length_aspect_ratio | scatter_ratio | elongatedness | pr.axis_rectangularity | max.length_rectangularity | scaled_variance | scaled_variance.1 | scaled_radius_of_gyration | scaled_radius_of_gyration.1 | skewness_about | skewness_about.1 | skewness_about.2 | hollows_ratio | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| compactness | 1.000000 | 0.648513 | 0.777662 | 0.707969 | 0.161585 | 0.359017 | 0.771602 | -0.784543 | 0.776425 | 0.654525 | 0.741832 | 0.775228 | 0.543655 | -0.271115 | 0.197084 | 0.143729 | 0.364105 | 0.390185 |
| circularity | 0.648513 | 1.000000 | 0.780452 | 0.614104 | 0.189088 | 0.458461 | 0.827850 | -0.816120 | 0.828516 | 0.962441 | 0.792711 | 0.821620 | 0.932274 | 0.105080 | 0.138196 | -0.030641 | -0.078661 | 0.029405 |
| distance_circularity | 0.777662 | 0.780452 | 1.000000 | 0.786261 | 0.233360 | 0.554384 | 0.914529 | -0.911667 | 0.911610 | 0.766775 | 0.884154 | 0.913681 | 0.690531 | -0.205292 | 0.084505 | 0.241224 | 0.192830 | 0.324180 |
| radius_ratio | 0.707969 | 0.614104 | 0.786261 | 1.000000 | 0.685795 | 0.485509 | 0.777654 | -0.804529 | 0.761803 | 0.567679 | 0.820374 | 0.790084 | 0.525666 | -0.293409 | 0.031756 | 0.157922 | 0.449663 | 0.492371 |
| pr.axis_aspect_ratio | 0.161585 | 0.189088 | 0.233360 | 0.685795 | 1.000000 | 0.308149 | 0.207715 | -0.241712 | 0.189232 | 0.144730 | 0.275515 | 0.225100 | 0.133556 | -0.196320 | -0.051683 | -0.026461 | 0.392798 | 0.403622 |
| max.length_aspect_ratio | 0.359017 | 0.458461 | 0.554384 | 0.485509 | 0.308149 | 1.000000 | 0.413294 | -0.390265 | 0.416005 | 0.538460 | 0.430869 | 0.401119 | 0.326457 | -0.197144 | 0.026354 | 0.112037 | 0.112418 | 0.357824 |
| scatter_ratio | 0.771602 | 0.827850 | 0.914529 | 0.777654 | 0.207715 | 0.413294 | 1.000000 | -0.993486 | 0.986466 | 0.791645 | 0.959768 | 0.994745 | 0.773461 | -0.017400 | 0.047472 | 0.189254 | 0.071914 | 0.151573 |
| elongatedness | -0.784543 | -0.816120 | -0.911667 | -0.804529 | -0.241712 | -0.390265 | -0.993486 | 1.000000 | -0.981512 | -0.773461 | -0.963479 | -0.996271 | -0.760251 | 0.055649 | -0.046397 | -0.172487 | -0.134561 | -0.195126 |
| pr.axis_rectangularity | 0.776425 | 0.828516 | 0.911610 | 0.761803 | 0.189232 | 0.416005 | 0.986466 | -0.981512 | 1.000000 | 0.798061 | 0.944046 | 0.984235 | 0.772019 | -0.013099 | 0.054186 | 0.192377 | 0.055813 | 0.142379 |
| max.length_rectangularity | 0.654525 | 0.962441 | 0.766775 | 0.567679 | 0.144730 | 0.538460 | 0.791645 | -0.773461 | 0.798061 | 1.000000 | 0.748092 | 0.782676 | 0.871857 | 0.083790 | 0.127713 | -0.010877 | -0.070827 | 0.064959 |
| scaled_variance | 0.741832 | 0.792711 | 0.884154 | 0.820374 | 0.275515 | 0.430869 | 0.959768 | -0.963479 | 0.944046 | 0.748092 | 1.000000 | 0.962269 | 0.756536 | 0.045770 | 0.016167 | 0.176640 | 0.086126 | 0.126152 |
| scaled_variance.1 | 0.775228 | 0.821620 | 0.913681 | 0.790084 | 0.225100 | 0.401119 | 0.994745 | -0.996271 | 0.984235 | 0.782676 | 0.962269 | 1.000000 | 0.766812 | -0.034191 | 0.045544 | 0.178169 | 0.102986 | 0.172951 |
| scaled_radius_of_gyration | 0.543655 | 0.932274 | 0.690531 | 0.525666 | 0.133556 | 0.326457 | 0.773461 | -0.760251 | 0.772019 | 0.871857 | 0.756536 | 0.766812 | 1.000000 | 0.254500 | 0.171176 | -0.069807 | -0.210354 | -0.135990 |
| scaled_radius_of_gyration.1 | -0.271115 | 0.105080 | -0.205292 | -0.293409 | -0.196320 | -0.197144 | -0.017400 | 0.055649 | -0.013099 | 0.083790 | 0.045770 | -0.034191 | 0.254500 | 1.000000 | -0.078408 | -0.101010 | -0.846691 | -0.889320 |
| skewness_about | 0.197084 | 0.138196 | 0.084505 | 0.031756 | -0.051683 | 0.026354 | 0.047472 | -0.046397 | 0.054186 | 0.127713 | 0.016167 | 0.045544 | 0.171176 | -0.078408 | 1.000000 | -0.032580 | 0.097731 | 0.070006 |
| skewness_about.1 | 0.143729 | -0.030641 | 0.241224 | 0.157922 | -0.026461 | 0.112037 | 0.189254 | -0.172487 | 0.192377 | -0.010877 | 0.176640 | 0.178169 | -0.069807 | -0.101010 | -0.032580 | 1.000000 | 0.069218 | 0.175997 |
| skewness_about.2 | 0.364105 | -0.078661 | 0.192830 | 0.449663 | 0.392798 | 0.112418 | 0.071914 | -0.134561 | 0.055813 | -0.070827 | 0.086126 | 0.102986 | -0.210354 | -0.846691 | 0.097731 | 0.069218 | 1.000000 | 0.901292 |
| hollows_ratio | 0.390185 | 0.029405 | 0.324180 | 0.492371 | 0.403622 | 0.357824 | 0.151573 | -0.195126 | 0.142379 | 0.064959 | 0.126152 | 0.172951 | -0.135990 | -0.889320 | 0.070006 | 0.175997 | 0.901292 | 1.000000 |
clf=svm.SVC()
X=df.iloc[:,:-1]
y= df.iloc[:,-1]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=1)
clf.fit(X_train, y_train)
SVC()
clf.score(X_test, y_test)
0.9566929133858267
y_pred = clf.predict(X_test)
accuracy_score(y_test, y_pred)
0.9566929133858267
pd.crosstab(y_test, y_pred)
| col_0 | bus | car | van |
|---|---|---|---|
| class | |||
| bus | 59 | 0 | 0 |
| car | 1 | 128 | 4 |
| van | 4 | 2 | 56 |
print(metrics.classification_report(y_test, y_pred))
precision recall f1-score support
bus 0.92 1.00 0.96 59
car 0.98 0.96 0.97 133
van 0.93 0.90 0.92 62
accuracy 0.96 254
macro avg 0.95 0.96 0.95 254
weighted avg 0.96 0.96 0.96 254
X1=df.iloc[:,:-1]
y1= df.iloc[:,-1]
sc = StandardScaler()
X_std = sc.fit_transform(X1)
cov_matrix = np.cov(X_std.T)
print('Covariance Matrix \n%s', cov_matrix)
Covariance Matrix %s [[ 1.00118343 0.64928071 0.77858213 0.70880729 0.16177611 0.35944219 0.77251518 -0.78547172 0.77734376 0.65530004 0.7427099 0.77614515 0.54429887 -0.27143577 0.19731709 0.14389942 0.36453607 0.39064704] [ 0.64928071 1.00118343 0.78137544 0.61483087 0.1893121 0.45900331 0.82882941 -0.81708621 0.82949691 0.96358033 0.79364869 0.82259206 0.93337754 0.1052041 0.13835956 -0.0306774 -0.0787538 0.02943942] [ 0.77858213 0.78137544 1.00118343 0.78719134 0.23363637 0.55504022 0.91561086 -0.91274554 0.91268863 0.76768225 0.88520049 0.91476224 0.69134771 -0.20553513 0.08460481 0.24150986 0.19305789 0.32456351] [ 0.70880729 0.61483087 0.78719134 1.00118343 0.68660665 0.48608324 0.77857472 -0.80548087 0.76270496 0.56835048 0.8213446 0.79101898 0.52628834 -0.29375624 0.03179368 0.1581088 0.45019476 0.49295344] [ 0.16177611 0.1893121 0.23363637 0.68660665 1.00118343 0.30851409 0.20796094 -0.24199761 0.18945636 0.14490169 0.27584132 0.22536673 0.1337144 -0.19655254 -0.0517439 -0.02649184 0.39326274 0.40409996] [ 0.35944219 0.45900331 0.55504022 0.48608324 0.30851409 1.00118343 0.41378358 -0.39072676 0.41649714 0.53909765 0.43137878 0.40159375 0.32684376 -0.1973776 0.02638484 0.11216958 0.11255057 0.35824721] [ 0.77251518 0.82882941 0.91561086 0.77857472 0.20796094 0.41378358 1.00118343 -0.99466158 0.98763363 0.79258191 0.9609034 0.99592191 0.77437624 -0.01742101 0.04752839 0.1894784 0.07199898 0.15175228] [-0.78547172 -0.81708621 -0.91274554 -0.80548087 -0.24199761 -0.39072676 -0.99466158 1.00118343 -0.98267372 -0.77437648 -0.96461898 -0.99745031 -0.76115096 0.05571512 -0.04645186 -0.17269111 -0.13471998 -0.19535697] [ 0.77734376 0.82949691 0.91268863 0.76270496 0.18945636 0.41649714 0.98763363 -0.98267372 1.00118343 0.7990059 0.94516304 0.98539989 0.77293273 -0.01311419 0.05424988 0.19260506 0.05587928 0.1425479 ] [ 0.65530004 0.96358033 0.76768225 0.56835048 0.14490169 0.53909765 0.79258191 -0.77437648 0.7990059 1.00118343 0.74897684 0.78360226 0.87288868 0.08388954 0.12786409 -0.01088969 -0.07091129 0.06503559] [ 0.7427099 0.79364869 0.88520049 0.8213446 0.27584132 0.43137878 0.9609034 -0.96461898 0.94516304 0.74897684 1.00118343 0.96340814 0.75743093 0.04582379 0.01618583 0.176849 0.0862275 0.12630146] [ 0.77614515 0.82259206 0.91476224 0.79101898 0.22536673 0.40159375 0.99592191 -0.99745031 0.98539989 0.78360226 0.96340814 1.00118343 0.76771974 -0.03423153 0.04559762 0.17837992 0.10310782 0.17315527] [ 0.54429887 0.93337754 0.69134771 0.52628834 0.1337144 0.32684376 0.77437624 -0.76115096 0.77293273 0.87288868 0.75743093 0.76771974 1.00118343 0.25480132 0.17137877 -0.06989 -0.21060282 -0.13615104] [-0.27143577 0.1052041 -0.20553513 -0.29375624 -0.19655254 -0.1973776 -0.01742101 0.05571512 -0.01311419 0.08388954 0.04582379 -0.03423153 0.25480132 1.00118343 -0.07850087 -0.10112969 -0.84769285 -0.89037257] [ 0.19731709 0.13835956 0.08460481 0.03179368 -0.0517439 0.02638484 0.04752839 -0.04645186 0.05424988 0.12786409 0.01618583 0.04559762 0.17137877 -0.07850087 1.00118343 -0.03261896 0.09784631 0.07008885] [ 0.14389942 -0.0306774 0.24150986 0.1581088 -0.02649184 0.11216958 0.1894784 -0.17269111 0.19260506 -0.01088969 0.176849 0.17837992 -0.06989 -0.10112969 -0.03261896 1.00118343 0.06929987 0.17620544] [ 0.36453607 -0.0787538 0.19305789 0.45019476 0.39326274 0.11255057 0.07199898 -0.13471998 0.05587928 -0.07091129 0.0862275 0.10310782 -0.21060282 -0.84769285 0.09784631 0.06929987 1.00118343 0.90235828] [ 0.39064704 0.02943942 0.32456351 0.49295344 0.40409996 0.35824721 0.15175228 -0.19535697 0.1425479 0.06503559 0.12630146 0.17315527 -0.13615104 -0.89037257 0.07008885 0.17620544 0.90235828 1.00118343]]
eigenvalues, eigenvectors = np.linalg.eig(cov_matrix)
print('Eigen Vectors \n%s', eigenvectors)
print('\n Eigen Values \n%s', eigenvalues)
Eigen Vectors %s [[-2.65249806e-01 -1.03022012e-01 2.41878917e-01 -1.68212796e-02 -1.48092118e-01 -5.94202136e-02 1.61229043e-01 -8.27249133e-01 -2.58635515e-01 -1.11120054e-01 -1.43186585e-01 -8.28627981e-02 -7.26179018e-02 -9.62242849e-02 -4.81204894e-02 3.01778484e-02 -4.75830215e-03 7.25499175e-03] [-2.83739997e-01 1.55767519e-01 -5.11666331e-03 -2.10371203e-01 8.27583413e-02 -1.03537138e-01 -4.09070451e-01 -2.66692189e-02 -8.33878765e-03 7.15939886e-02 4.64365455e-02 2.00194517e-01 1.07243361e-01 -5.16262686e-01 5.72840539e-01 1.03747196e-01 -2.91190097e-02 -1.05685825e-02] [-3.04039660e-01 -3.62756689e-02 7.30756307e-02 9.11467591e-02 8.15573542e-02 -7.43393903e-02 1.15297741e-01 1.83306358e-01 -3.74158779e-01 -2.76833875e-01 7.50965614e-01 -1.67097117e-01 -1.49596638e-01 1.89889841e-02 3.61053590e-02 -2.78687230e-02 8.49468526e-03 -1.13446286e-02] [-2.74143941e-01 -1.88579602e-01 -2.50161489e-01 2.92723167e-03 -8.66319967e-02 2.49671105e-01 9.85828727e-02 -4.62828791e-02 -2.13286630e-02 -2.35389193e-01 -1.87181840e-02 2.81435979e-01 5.71069770e-01 3.33222855e-01 2.28797987e-01 -3.45706449e-01 -2.56958203e-03 2.33720809e-02] [-1.04195716e-01 -2.35493808e-01 -6.21822552e-01 -1.71347341e-01 6.14117485e-03 5.09172018e-01 -1.28469587e-01 -5.57417052e-02 -2.56502633e-01 1.61656982e-01 -1.44759084e-02 -7.94355783e-02 -2.78218773e-01 -1.29023261e-01 -1.47161022e-01 1.56419453e-01 -3.57653719e-03 -1.10569840e-02] [-1.71617997e-01 -1.00205131e-01 -1.97812492e-01 -1.07865539e-01 7.77470493e-01 -2.22541002e-01 3.95770363e-01 -3.60789181e-02 1.56742463e-01 -7.58563548e-02 -1.79499328e-01 3.45267306e-02 -1.59925512e-01 -1.16669863e-02 7.31628466e-02 -5.11752360e-02 1.35208987e-02 1.48123343e-02] [-3.11894600e-01 6.25928350e-02 3.77814890e-02 1.17105313e-01 -1.11904043e-01 -2.12654014e-03 1.17360308e-01 1.58479902e-01 -5.98079998e-04 1.99553125e-01 -1.53314918e-01 -5.47931719e-02 -7.96429394e-02 -1.14694428e-01 -7.82110802e-02 -2.90108604e-01 -2.68058316e-01 -7.64384611e-01] [ 3.12501845e-01 -3.49218209e-02 -2.80012452e-02 -1.04918257e-01 1.64255501e-01 -8.70690555e-03 -1.05623879e-01 -1.53526772e-01 -6.71882382e-02 -1.65717947e-01 1.24704245e-01 5.60312141e-02 1.07867560e-01 1.61356773e-01 4.40789132e-02 2.48698472e-01 -8.09360912e-01 -1.42656483e-01] [-3.10107352e-01 6.86945400e-02 5.36072098e-02 1.15259740e-01 -9.51299321e-02 -1.33933958e-02 1.12706693e-01 1.23631472e-01 -8.73177256e-02 2.57149600e-01 -1.81787124e-01 -9.05204800e-02 -1.09428357e-01 5.27499115e-01 3.78346077e-01 5.45288179e-01 1.24450218e-03 5.66774283e-03] [-2.75756169e-01 1.41079995e-01 1.77903514e-02 -2.06055603e-01 1.98091375e-01 -1.89854559e-01 -3.68938048e-01 -1.75738029e-01 4.82272218e-02 4.30702886e-01 2.41037351e-01 2.27439863e-01 9.98227668e-02 3.36621138e-01 -4.38728528e-01 -6.55216577e-02 3.12646668e-02 1.74664602e-02] [-3.05662231e-01 6.13597246e-02 -5.41714358e-02 1.19085663e-01 -1.11925478e-01 7.19007891e-02 2.06212530e-01 5.83492139e-02 3.73128658e-01 -2.30475022e-01 8.53938270e-02 2.29644351e-01 2.22819069e-01 -2.75225859e-01 -3.88118080e-01 5.40606731e-01 -2.58420584e-02 -2.71164327e-02] [-3.12154429e-01 4.94496819e-02 3.08405978e-02 1.10519172e-01 -1.39948022e-01 2.23898254e-03 1.13704640e-01 1.64553402e-01 4.51769257e-02 1.93787554e-01 -1.15036192e-01 -7.79965221e-02 -1.09003427e-01 -1.41164556e-01 -7.79128557e-02 -2.67335789e-01 -5.18284330e-01 6.24878921e-01] [-2.58464283e-01 2.35717337e-01 6.86194888e-05 -2.21617995e-01 2.31352459e-03 5.93323082e-03 -4.13924835e-01 1.09529528e-01 9.13331285e-02 -6.04624532e-01 -3.06478343e-01 -2.93737636e-01 -1.71419057e-01 2.00037262e-01 -1.37304989e-01 -5.23628433e-02 -5.07417276e-03 -2.22617488e-03] [ 3.59699918e-02 4.95044359e-01 -1.89384724e-01 4.70017592e-02 1.55667117e-02 2.76022427e-01 1.26219421e-01 -3.21426244e-01 4.57390825e-01 1.03285531e-01 3.03376996e-01 -3.93194435e-01 2.01133538e-02 5.84681415e-02 1.95309208e-01 -1.04337837e-01 -5.10454494e-03 -2.21290869e-02] [-3.27733460e-02 -2.16280242e-02 5.81746175e-01 -4.93016639e-01 1.69689325e-01 5.76471747e-01 1.71064648e-01 1.36688228e-01 1.22038034e-02 7.47046586e-02 1.37513605e-02 1.87030018e-02 3.29445546e-02 -1.29467737e-02 -1.46432022e-02 1.21956086e-02 3.46209019e-03 2.54272115e-03] [-5.16845687e-02 -8.90837542e-02 2.35988151e-01 7.04782145e-01 3.86938950e-01 3.74619186e-01 -3.59021783e-01 -8.17792526e-02 2.85061791e-02 -2.44150397e-02 -5.30695583e-02 6.65788615e-02 -4.79393685e-02 -9.89681605e-03 -1.84352801e-03 -1.03510969e-02 6.63531149e-03 1.14113489e-02] [-5.80127645e-02 -5.08765049e-01 7.44552697e-02 -7.99226140e-02 -2.19311200e-01 -2.89307619e-02 -1.09060928e-01 -8.17955791e-02 5.50010905e-01 -5.82258827e-02 2.01869673e-01 1.99287806e-01 -4.62331800e-01 1.33204122e-01 1.73797165e-01 -8.29881440e-02 -3.95143520e-02 -4.27211035e-02] [-8.94857243e-02 -5.08821208e-01 5.42024985e-02 -4.23570035e-02 7.61489140e-02 -1.33819526e-01 -1.54907594e-01 4.56562502e-02 1.59606312e-01 1.52320089e-01 1.26861687e-02 -6.54339122e-01 4.28220630e-01 -9.02972146e-02 -3.25694781e-02 9.38552664e-02 1.96927259e-04 -1.34815346e-02]] Eigen Values %s [9.64104985e+00 3.29869627e+00 1.17183871e+00 1.19739923e+00 8.59916998e-01 7.84115472e-01 3.72629538e-01 2.56895019e-01 1.26797293e-01 9.18091536e-02 6.84368623e-02 5.46198698e-02 3.70337677e-02 2.07847621e-02 1.88231087e-02 1.32661767e-02 2.77910098e-03 4.41059005e-03]
# Make a set of (eigenvalue, eigenvector) pairs
eig_pairs = [(eigenvalues[index], eigenvectors[:,index]) for index in range(len(eigenvalues))]
eig_pairs.sort()
eig_pairs.reverse()
print(eig_pairs)
eigvalues_sorted = [eig_pairs[index][0] for index in range(len(eigenvalues))]
eigvectors_sorted = [eig_pairs[index][1] for index in range(len(eigenvalues))]
print('Eigenvalues in descending order: \n%s' %eigvalues_sorted)
[(9.64104985312631, array([-0.26524981, -0.28374 , -0.30403966, -0.27414394, -0.10419572,
-0.171618 , -0.3118946 , 0.31250184, -0.31010735, -0.27575617,
-0.30566223, -0.31215443, -0.25846428, 0.03596999, -0.03277335,
-0.05168457, -0.05801276, -0.08948572])), (3.298696269122248, array([-0.10302201, 0.15576752, -0.03627567, -0.1885796 , -0.23549381,
-0.10020513, 0.06259283, -0.03492182, 0.06869454, 0.14107999,
0.06135972, 0.04944968, 0.23571734, 0.49504436, -0.02162802,
-0.08908375, -0.50876505, -0.50882121])), (1.1973992330834702, array([-0.01682128, -0.2103712 , 0.09114676, 0.00292723, -0.17134734,
-0.10786554, 0.11710531, -0.10491826, 0.11525974, -0.2060556 ,
0.11908566, 0.11051917, -0.221618 , 0.04700176, -0.49301664,
0.70478214, -0.07992261, -0.042357 ])), (1.1718387078485022, array([ 2.41878917e-01, -5.11666331e-03, 7.30756307e-02, -2.50161489e-01,
-6.21822552e-01, -1.97812492e-01, 3.77814890e-02, -2.80012452e-02,
5.36072098e-02, 1.77903514e-02, -5.41714358e-02, 3.08405978e-02,
6.86194888e-05, -1.89384724e-01, 5.81746175e-01, 2.35988151e-01,
7.44552697e-02, 5.42024985e-02])), (0.8599169977079716, array([-0.14809212, 0.08275834, 0.08155735, -0.086632 , 0.00614117,
0.77747049, -0.11190404, 0.1642555 , -0.09512993, 0.19809138,
-0.11192548, -0.13994802, 0.00231352, 0.01556671, 0.16968933,
0.38693895, -0.2193112 , 0.07614891])), (0.7841154723760503, array([-0.05942021, -0.10353714, -0.07433939, 0.24967111, 0.50917202,
-0.222541 , -0.00212654, -0.00870691, -0.0133934 , -0.18985456,
0.07190079, 0.00223898, 0.00593323, 0.27602243, 0.57647175,
0.37461919, -0.02893076, -0.13381953])), (0.3726295376446502, array([ 0.16122904, -0.40907045, 0.11529774, 0.09858287, -0.12846959,
0.39577036, 0.11736031, -0.10562388, 0.11270669, -0.36893805,
0.20621253, 0.11370464, -0.41392483, 0.12621942, 0.17106465,
-0.35902178, -0.10906093, -0.15490759])), (0.2568950194548444, array([-0.82724913, -0.02666922, 0.18330636, -0.04628288, -0.05574171,
-0.03607892, 0.1584799 , -0.15352677, 0.12363147, -0.17573803,
0.05834921, 0.1645534 , 0.10952953, -0.32142624, 0.13668823,
-0.08177925, -0.08179558, 0.04565625])), (0.1267972928557123, array([-0.25863551, -0.00833879, -0.37415878, -0.02132866, -0.25650263,
0.15674246, -0.00059808, -0.06718824, -0.08731773, 0.04822722,
0.37312866, 0.04517693, 0.09133313, 0.45739083, 0.0122038 ,
0.02850618, 0.55001091, 0.15960631])), (0.09180915364321345, array([-0.11112005, 0.07159399, -0.27683387, -0.23538919, 0.16165698,
-0.07585635, 0.19955312, -0.16571795, 0.2571496 , 0.43070289,
-0.23047502, 0.19378755, -0.60462453, 0.10328553, 0.07470466,
-0.02441504, -0.05822588, 0.15232009])), (0.06843686227401986, array([-0.14318658, 0.04643655, 0.75096561, -0.01871818, -0.01447591,
-0.17949933, -0.15331492, 0.12470425, -0.18178712, 0.24103735,
0.08539383, -0.11503619, -0.30647834, 0.303377 , 0.01375136,
-0.05306956, 0.20186967, 0.01268617])), (0.05461986976927837, array([-0.0828628 , 0.20019452, -0.16709712, 0.28143598, -0.07943558,
0.03452673, -0.05479317, 0.05603121, -0.09052048, 0.22743986,
0.22964435, -0.07799652, -0.29373764, -0.39319444, 0.018703 ,
0.06657886, 0.19928781, -0.65433912])), (0.037033767660817486, array([-0.0726179 , 0.10724336, -0.14959664, 0.57106977, -0.27821877,
-0.15992551, -0.07964294, 0.10786756, -0.10942836, 0.09982277,
0.22281907, -0.10900343, -0.17141906, 0.02011335, 0.03294455,
-0.04793937, -0.4623318 , 0.42822063])), (0.0207847620703778, array([-0.09622428, -0.51626269, 0.01898898, 0.33322286, -0.12902326,
-0.01166699, -0.11469443, 0.16135677, 0.52749911, 0.33662114,
-0.27522586, -0.14116456, 0.20003726, 0.05846814, -0.01294677,
-0.00989682, 0.13320412, -0.09029721])), (0.01882310874197125, array([-0.04812049, 0.57284054, 0.03610536, 0.22879799, -0.14716102,
0.07316285, -0.07821108, 0.04407891, 0.37834608, -0.43872853,
-0.38811808, -0.07791286, -0.13730499, 0.19530921, -0.0146432 ,
-0.00184353, 0.17379716, -0.03256948])), (0.013266176739548647, array([ 0.03017785, 0.1037472 , -0.02786872, -0.34570645, 0.15641945,
-0.05117524, -0.2901086 , 0.24869847, 0.54528818, -0.06552166,
0.54060673, -0.26733579, -0.05236284, -0.10433784, 0.01219561,
-0.0103511 , -0.08298814, 0.09385527])), (0.004410590045403887, array([ 0.00725499, -0.01056858, -0.01134463, 0.02337208, -0.01105698,
0.01481233, -0.76438461, -0.14265648, 0.00566774, 0.01746646,
-0.02711643, 0.62487892, -0.00222617, -0.02212909, 0.00254272,
0.01141135, -0.0427211 , -0.01348153])), (0.0027791009835487334, array([-4.75830215e-03, -2.91190097e-02, 8.49468526e-03, -2.56958203e-03,
-3.57653719e-03, 1.35208987e-02, -2.68058316e-01, -8.09360912e-01,
1.24450218e-03, 3.12646668e-02, -2.58420584e-02, -5.18284330e-01,
-5.07417276e-03, -5.10454494e-03, 3.46209019e-03, 6.63531149e-03,
-3.95143520e-02, 1.96927259e-04]))]
Eigenvalues in descending order:
[9.64104985312631, 3.298696269122248, 1.1973992330834702, 1.1718387078485022, 0.8599169977079716, 0.7841154723760503, 0.3726295376446502, 0.2568950194548444, 0.1267972928557123, 0.09180915364321345, 0.06843686227401986, 0.05461986976927837, 0.037033767660817486, 0.0207847620703778, 0.01882310874197125, 0.013266176739548647, 0.004410590045403887, 0.0027791009835487334]
tot = sum(eigenvalues)
var_explained = [(i / tot) for i in sorted(eigenvalues, reverse=True)]
cum_var_exp = np.cumsum(var_explained)
plt.bar(range(1,19), var_explained, alpha=0.5, align='center', label='individual explained variance')
plt.step(range(1,19),cum_var_exp, where= 'mid', label='cumulative explained variance')
plt.ylabel('Explained variance ratio')
plt.xlabel('Principal components')
plt.legend(loc = 'best')
plt.show()
pca = PCA(n_components=9) # reducing the dimension to 9 from 19
X_pca = pca.fit_transform(X_std)
from sklearn import model_selection
X_train, X_test, y_train, y_test = model_selection.train_test_split(X_pca, y1, test_size=0.30, random_state=1)
clf.fit(X_train, y_train)
SVC()
clf.score(X_test, y_test)
0.9330708661417323
y_pred = clf.predict(X_test)
accuracy_score(y_test, y_pred)
0.9330708661417323
pd.crosstab(y_test, y_pred)
| col_0 | bus | car | van |
|---|---|---|---|
| class | |||
| bus | 58 | 1 | 0 |
| car | 1 | 126 | 6 |
| van | 5 | 4 | 53 |
print(metrics.classification_report(y_test, y_pred))
precision recall f1-score support
bus 0.91 0.98 0.94 59
car 0.96 0.95 0.95 133
van 0.90 0.85 0.88 62
accuracy 0.93 254
macro avg 0.92 0.93 0.92 254
weighted avg 0.93 0.93 0.93 254
--- End----